########################################################## #
###   R code to analyze data for
###   'Who Do You Loathe? Feelings toward Politicians vs.
###   Ordinary People in the Opposing Party'
###   by Kingzette, Jon
########################################################## #
###   This code will produce all statistics, tables, and figures 
###   in paper & appendix
########################################################## #
###   Code written with R version 3.6.2


### SETUP ###

# Install/update packages (if necessary)
# install.packages("tidyverse")
# install.packages("stargazer")

# Load packages
library(tidyverse)
library(stargazer)

# Load dataset
lucid <- read.csv("lucid_data_order.csv")

# Removing respondents who did not complete survey
lucid <- subset(lucid, Finished == "TRUE")

# Analyses will only be performed on partisans, so subsetting here
# Note: The corresponding levels of the politcal_party variable can be 
# found in the supplementary file titled "Lucid_codebook"
partisans <- subset(lucid, political_party == 1 | political_party == 2 |
                      political_party == 3 | political_party == 6 |
                      political_party == 5 | political_party == 8 |
                      political_party == 9 | political_party == 10)



### Variable creation ###

# Creating dummy vars for republican and democrat, including leaners
partisans$rep <- with(partisans, 
                      ifelse(political_party == 5 | political_party == 8 |
                               political_party == 9 | political_party == 10, 1, 0))

partisans$dem <- with(partisans, 
                      ifelse(political_party == 1 | political_party == 2 |
                               political_party == 3 | political_party == 6, 1, 0))

# Creating central measures -- partisans' feelings toward their out and in:
# 1) party; 2) politicians; 3) ordinary citizens
partisans$out_party <- with(partisans, 
                            ifelse(dem == 1, anes_ft_2, anes_ft_1))

partisans$out_pols <- with(partisans, 
                           ifelse(dem == 1, elites_2, elites_1))

partisans$out_cits <- with(partisans, 
                           ifelse(dem == 1, citizens_2, citizens_1))

partisans$in_party <- with(partisans, 
                           ifelse(dem == 0, anes_ft_2, anes_ft_1))

partisans$in_pols <- with(partisans, 
                          ifelse(dem == 0, elites_2, elites_1))

partisans$in_cits <- with(partisans, 
                          ifelse(dem == 0, citizens_2, citizens_1))

# Creating new variables that reflect within-subject differences between
# each thermometer item -- out-party:
# 1) FT politicians - FT party
# 2) FT ordinary people - FT party
# 3) FT ordinary people - FT politicians
partisans$polparty <- partisans$out_pols - partisans$out_party
partisans$citparty <- partisans$out_cits - partisans$out_party
partisans$citpols <- partisans$out_cits - partisans$out_pols

# Creating new variables that reflect within-subject differences between
# each thermometer item -- in-party:
# 1) FT politicians - FT party
# 2) FT ordinary people - FT party
# 3) FT ordinary people - FT politicians
partisans$inpolparty <- partisans$in_pols - partisans$in_party
partisans$incitparty <- partisans$in_cits - partisans$in_party
partisans$incitpols <- partisans$in_cits - partisans$in_pols

# Creating measures of "in-group bias" based on thermometer items in reference
# to party and thermometer items in reference to ordinary partisans
partisans$bias_party <- with(partisans,
                             ifelse(dem == 1, anes_ft_1 - anes_ft_2,
                                    anes_ft_2 - anes_ft_1))

partisans$bias_cits <- with(partisans, 
                            ifelse(dem == 1, citizens_1 - citizens_2,
                                   citizens_2 - citizens_1))

# covariates for analysis at end of appendix
partisans$ideo <- with(partisans, 
                       ifelse(ideo_id == "Very liberal", 1,
                              ifelse(ideo_id == "Liberal", 2,
                                     ifelse(ideo_id == "Somewhat liberal", 3,
                                            ifelse(ideo_id == "Moderate, middle of the road", 4,
                                                   ifelse(ideo_id == "Somewhat conservative", 5,
                                                          ifelse(ideo_id == "Conservative", 6,
                                                                 ifelse(ideo_id == "Very conservative", 7, NA))))))))

partisans$white <- with(partisans, 
                        ifelse(ethnicity == 1 & hispanic == 1, 1, 0))

partisans$strong <- with(partisans,
                         ifelse(political_party == 1 | political_party == 10,
                                1, 0))

partisans$leaner <- with(partisans,
                         ifelse(political_party == 3 | political_party == 5 |
                                  political_party == 6 | political_party == 8, 
                                1, 0))

partisans$female <- ifelse(partisans$gender == 2, 1, 0)

# Creating two different data sets: Republicans and Democrats
republicans <- subset(partisans, rep == 1)
democrats <- subset(partisans, dem == 1)



### Test of statistical power in Introduction ###

# Because there are less Reps than Dems in this sample, testing power of 
# paired sample t-test with n = number of Republicans and 
# Cohen's d = .2 (small effect) -- delta and sd are arbitrarily chosen to 
# yield Cohen's d = .2
power.t.test(n = nrow(republicans), 
             delta = 1, 
             sd = 5, 
             sig.level = 0.05,
             type = "paired",
             alternative = "two.sided")



### Main Analyses in Paper ###

# Figures 1 and 2: Show the mean within-subject differences broken out by party
# for each of the thermometer items toward the *out-group*
# Note that dimensions of figures were manually adjusted to fit 
# in the Latex document.

# Figure 1: Within-subject differences in Democrats' 
# thermometer scores toward out-group party, pols, and cits

# Standard errors for plot
se1 <- sd(democrats$citparty)/sqrt(460)
se2 <- sd(democrats$polparty)/sqrt(460)
se3 <- sd(democrats$citpols)/sqrt(460)

# Creating plotting matrix of means and confidence intervals
plotting_dems <- data.frame(diffs = c("Ordinary Reps. - Rep. Party", 
                                      "Rep. Politicians - Rep. Party",
                                      "Ordinary Reps. - Rep. Politicians"),
                            means = c(mean(democrats$citparty), 
                                      mean(democrats$polparty),
                                      mean(democrats$citpols)),
                            ci = c(qt(.975, df = 459)*se1, 
                                   qt(.975, df = 459)*se2,
                                   qt(.975, df = 459)*se3))

# Creating plot
ggplot(plotting_dems, aes(x=as.factor(diffs), y = means)) +
  geom_point() +
  geom_errorbar(aes(ymin = means - ci, ymax = means + ci, width = .1)) +
  geom_hline(yintercept=0, linetype = "dashed") +
  scale_y_continuous(breaks = c(-4:13),
                     limits = c(-4,13)) +
  labs(title = " ",
       x = " ",
       y = "Means and 95% Confidence Intervals")+
  theme_bw() +
  theme(axis.text.x = element_text(size=12))


# Figure 2: Within-subject differences in Republicans' 
# thermometer scores toward out-group party, pols, and cits

# Standard errors for plot
se1 <- sd(republicans$citparty)/sqrt(399)
se2 <- sd(republicans$polparty)/sqrt(399)
se3 <- sd(republicans$citpols)/sqrt(399)

# Creating plotting matrix of means and confidence intervals
plotting_reps <- data.frame(diffs = c("Ordinary Dems. - Dem. Party",
                                      "Dem. Politicians - Dem. Party",
                                      "Ordinary Dems. - Dem. Politicians"),
                            means = c(mean(republicans$citparty), 
                                      mean(republicans$polparty),
                                      mean(republicans$citpols)),
                            ci = c(qt(.975, df = 399)*se1, 
                                   qt(.975, df = 399)*se2,
                                   qt(.975, df = 399)*se3))

# Creating plot
ggplot(plotting_reps, aes(x=as.factor(diffs), y = means)) +
  geom_point() +
  geom_errorbar(aes(ymin = means - ci, ymax = means + ci, width = .1)) +
  geom_hline(yintercept=0, linetype = "dashed") +
  scale_y_continuous(breaks = c(-4:13),
                     limits = c(-4,13)) +
  labs(title = " ",
       x = " ",
       y = "Means and 95% Confidence Intervals") +
  theme_bw() +
  scale_x_discrete(limits = c("Ordinary Dems. - Dem. Party",
                              "Ordinary Dems. - Dem. Politicians",
                              "Dem. Politicians - Dem. Party"),
                   labels = c("Ordinary Dems. - Dem. Party",
                              "Ordinary Dems. - Dem. Politicians",
                              "Dem. Politicians - Dem. Party"))+
  theme(axis.text.x = element_text(size=12))


# Corresponding t-tests for Figures 1 and 2
t.test(democrats$citpols)
t.test(democrats$citparty)
t.test(democrats$polparty)

t.test(republicans$citpols)
t.test(republicans$citparty)
t.test(republicans$polparty)


# Raw thermometer scores toward out-group party, pols, and cits
# Reported in footnote 4 in manuscript
mean(democrats$out_cits)
mean(democrats$out_party)
mean(democrats$out_pols)

mean(republicans$out_cits)
mean(republicans$out_party)
mean(republicans$out_pols)


# Figures 3 and 4: Show the mean within-subject differences broken out by party
# for each of the thermometer items toward the *out-group*, showing the lack of
# significant order effects
# Note that dimensions of figures were manually adjusted to fit 
# in the Latex document.

# Figure 3: Shows within-subject differences in Democrats' feelings toward
# out-group party, pols, and cits for each order these thermometer items were 
# presented; citizens_order is the order variable

# Standard errors
se1 <- sd(democrats$citparty[democrats$citizens_order == 1])/sqrt(460)
se2 <- sd(democrats$polparty[democrats$citizens_order == 1])/sqrt(460)
se3 <- sd(democrats$citpols[democrats$citizens_order == 1])/sqrt(460)
se4 <- sd(democrats$citparty[democrats$citizens_order == 2])/sqrt(460)
se5 <- sd(democrats$polparty[democrats$citizens_order == 2])/sqrt(460)
se6 <- sd(democrats$citpols[democrats$citizens_order == 2])/sqrt(460)

# Creating plotting matrix of means and confidence intervals
plotting_dems2 <- data.frame(diffs = c("Ordinary Reps. - Rep. Party", 
                                       "Rep. Politicians - Rep. Party",
                                       "Ordinary Reps. - Rep. Politicians",
                                       "Ordinary Reps. - Rep. Party", 
                                       "Rep. Politicians - Rep. Party",
                                       "Ordinary Reps. - Rep. Politicians"),
                             means = c(mean(democrats$citparty[democrats$citizens_order == 1]), 
                                       mean(democrats$polparty[democrats$citizens_order == 1]),
                                       mean(democrats$citpols[democrats$citizens_order == 1]),
                                       mean(democrats$citparty[democrats$citizens_order == 2]), 
                                       mean(democrats$polparty[democrats$citizens_order == 2]),
                                       mean(democrats$citpols[democrats$citizens_order == 2])),
                             ci = c(qt(.975, df = 459)*se1, 
                                    qt(.975, df = 459)*se2,
                                    qt(.975, df = 459)*se3,
                                    qt(.975, df = 459)*se4, 
                                    qt(.975, df = 459)*se5,
                                    qt(.975, df = 459)*se6))
plotting_dems2$order <- c(1,1,1,2,2,2)

# Creating plot
ggplot(plotting_dems2, aes(x=as.factor(diffs), y = means, group = as.factor(order))) +
  geom_point(aes(shape = as.factor(order)), position=position_dodge(width=0.3)) +
  geom_errorbar(aes(ymin = means - ci, ymax = means + ci, width = .1),
                position=position_dodge(width=0.3)) +
  geom_hline(yintercept=0, linetype = "dashed") +
  scale_y_continuous(breaks = c(-4:13),
                     limits = c(-4,13)) +
  labs(title = " ",
       x = " ",
       y = "Means and 95% Confidence Intervals",
       shape = "Question Order") +
  scale_shape_discrete(labels = c("Ordinary first", "Politicians first"))+
  theme_bw() +
  theme(axis.text.x = element_text(size=11))


# Figure 4: Shows within-subject differences in Republicans' feelings toward
# out-group party, pols, and cits for each order these thermometer items were 
# presented

# Standard errors
se1 <- sd(republicans$citparty[republicans$citizens_order == 1])/sqrt(399)
se2 <- sd(republicans$polparty[republicans$citizens_order == 1])/sqrt(399)
se3 <- sd(republicans$citpols[republicans$citizens_order == 1])/sqrt(399)
se4 <- sd(republicans$citparty[republicans$citizens_order == 2])/sqrt(399)
se5 <- sd(republicans$polparty[republicans$citizens_order == 2])/sqrt(399)
se6 <- sd(republicans$citpols[republicans$citizens_order == 2])/sqrt(399)

# Creating plotting matrix of means and confidence intervals
plotting_reps2 <- data.frame(diffs = c("Ordinary Dems. - Dem. Party", 
                                       "Dem. Politicians - Dem. Party",
                                       "Ordinary Dems. - Dem. Politicians",
                                       "Ordinary Dems. - Dem. Party", 
                                       "Dem. Politicians - Dem. Party",
                                       "Ordinary Dems. - Dem. Politicians"),
                             means = c(mean(republicans$citparty[republicans$citizens_order == 1]), 
                                       mean(republicans$polparty[republicans$citizens_order == 1]),
                                       mean(republicans$citpols[republicans$citizens_order == 1]),
                                       mean(republicans$citparty[republicans$citizens_order == 2]), 
                                       mean(republicans$polparty[republicans$citizens_order == 2]),
                                       mean(republicans$citpols[republicans$citizens_order == 2])),
                             ci = c(qt(.975, df = 399)*se1, 
                                    qt(.975, df = 399)*se2,
                                    qt(.975, df = 399)*se3,
                                    qt(.975, df = 399)*se4, 
                                    qt(.975, df = 399)*se5,
                                    qt(.975, df = 399)*se6))
plotting_reps2$order <- c(1,1,1,2,2,2)

# Creating plot
ggplot(plotting_reps2, aes(x=as.factor(diffs), y = means, group = as.factor(order))) +
  geom_point(aes(shape = as.factor(order)), position=position_dodge(width=0.3)) +
  geom_errorbar(aes(ymin = means - ci, ymax = means + ci, width = .1),
                position=position_dodge(width=0.3)) +
  geom_hline(yintercept=0, linetype = "dashed") +
  scale_y_continuous(breaks = c(-4:13),
                     limits = c(-4,13)) +
  labs(title = " ",
       x = " ",
       y = "Means and 95% Confidence Intervals",
       shape = "Question Order") +
  scale_shape_discrete(labels = c("Ordinary first", "Politicians first"))+
  theme_bw() +
  scale_x_discrete(limits = c("Ordinary Dems. - Dem. Party",
                              "Ordinary Dems. - Dem. Politicians",
                              "Dem. Politicians - Dem. Party"),
                   labels = c("Ordinary Dems. - Dem. Party",
                              "Ordinary Dems. - Dem. Politicians",
                              "Dem. Politicians - Dem. Party"))+
  theme(axis.text.x = element_text(size=11))


# Corresponding t-tests for Figures 3 and 4 (testing order effects)
t.test(democrats$polparty[democrats$citizens_order == 1],
       democrats$polparty[democrats$citizens_order == 2])
t.test(democrats$citparty[democrats$citizens_order == 1],
       democrats$citparty[democrats$citizens_order == 2]) 
t.test(democrats$citpols[democrats$citizens_order == 1],
       democrats$citpols[democrats$citizens_order == 2])

t.test(republicans$polparty[republicans$citizens_order == 1],
       republicans$polparty[republicans$citizens_order == 2])
t.test(republicans$citparty[republicans$citizens_order == 1],
       republicans$citparty[republicans$citizens_order == 2]) 
t.test(republicans$citpols[republicans$citizens_order == 1],
       republicans$citpols[republicans$citizens_order == 2])



# Figures 5 and 6: Show the mean within-subject differences broken out by party
# for each of the thermometer items toward the *in-group*
# Note that dimensions of figures were manually adjusted to fit 
# in the Latex document.

# Figure 5: Shows within-subject differences in Democrats' feelings toward
# in-group party, pols, and cits

# Standard errors
se1 <- sd(democrats$incitparty)/sqrt(460)
se2 <- sd(democrats$inpolparty)/sqrt(460)
se3 <- sd(democrats$incitpols)/sqrt(460)

# Creating plotting matrix of means and confidence intervals
plotting_dems3 <- data.frame(diffs = as.factor(c("Ordinary Dems. - Dem. Party", 
                                                 "Dem. Politicians - Dem. Party",
                                                 "Ordinary Dems. - Dem. Politicians")),
                             means = c(mean(democrats$incitparty), 
                                       mean(democrats$inpolparty),
                                       mean(democrats$incitpols)),
                             ci = c(qt(.975, df = 459)*se1, 
                                    qt(.975, df = 459)*se2,
                                    qt(.975, df = 459)*se3))

plotting_dems3$diffs2 <- factor(plotting_dems3$diffs, 
                                levels=c("Ordinary Dems. - Dem. Party",
                                         "Ordinary Dems. - Dem. Politicians",
                                         "Dem. Politicians - Dem. Party"),
                                labels=c("Ordinary Dems. - Dem. Party", 
                                         "Ordinary Dems. - Dem. Politicians",
                                         "Dem. Politicians - Dem. Party"))

# Creating plot
ggplot(plotting_dems3, aes(x=diffs2, y = means)) +
  geom_point() +
  geom_errorbar(aes(ymin = means - ci, ymax = means + ci, width = .1)) +
  geom_hline(yintercept=0, linetype = "dashed")+
  scale_y_continuous(breaks = c(-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4),
                     limits = c(-8,4)) +
  labs(title = " ",
       x = " ",
       y = "Means and 95% Confidence Intervals") + 
  theme_bw() +
  theme(axis.text.x = element_text(size=12))


# Figure 6: Shows within-subject differences in Republicans' feelings toward
# in-group party, pols, and cits

# Standard errors
se1 <- sd(republicans$incitparty)/sqrt(399)
se2 <- sd(republicans$inpolparty)/sqrt(399)
se3 <- sd(republicans$incitpols)/sqrt(399)

# Creating plotting matrix of means and confidence intervals
plotting_reps3 <- data.frame(diffs = c("Ordinary Reps. - Rep. Party", 
                                       "Rep. Politicians - Rep. Party",
                                       "Ordinary Reps. - Rep. Politicians"),
                             means = c(mean(republicans$incitparty), 
                                       mean(republicans$inpolparty),
                                       mean(republicans$incitpols)),
                             ci = c(qt(.975, df = 398)*se1, 
                                    qt(.975, df = 398)*se2,
                                    qt(.975, df = 398)*se3))

# Creating plot
ggplot(plotting_reps3, aes(x=diffs, y = means)) +
  geom_point() +
  geom_errorbar(aes(ymin = means - ci, ymax = means + ci, width = .1)) +
  geom_hline(yintercept=0, linetype = "dashed") +
  scale_y_continuous(breaks = c(-11,-10,-9,-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6),
                     limits = c(-11,6)) +
  labs(title = " ",
       x = " ",
       y = "Means and 95% Confidence Intervals") +
  theme_bw() +
  theme(axis.text.x = element_text(size=12))


# Corresponding t-tests for Figures 5 and 6
t.test(democrats$inpolparty)
t.test(democrats$incitparty) 
t.test(democrats$incitpols)

t.test(republicans$inpolparty)
t.test(republicans$incitparty) 
t.test(republicans$incitpols)


# Comparing mean levels of in-group bias when using thermometer items in
# reference to ordinary partisans vs. the party (in Discussion section)
mean(democrats$bias_party)
mean(republicans$bias_party)
mean(democrats$bias_cits)
mean(republicans$bias_cits)



### Analyses in Appendix ###

## National representativeness of sample ##

# Data in this study is approximately nationally representative on sex, 
# race/ethnicity, age, and region. Lucid benchmarks their sample to the 2010 
# adult population, so here I compare my sample to these benchmarks. 
# National benchmarks for these variables were obtained using 
# https://www.socialexplorer.com/. 

# Relevant benchmark data is used to perform t-tests comparing my sample to a 
# national sample. First, I will create the benchmarks, before going through 
# each variable one at a time. The basic approach is to take the total 
# number of people in each category and subtract out those who are under 18:

# First, get total adult population
adults10 <- 308745538 - 20201362 - 20348657 - 20677194 - 12954254

# Sex
## Percent male of those over 18
bench_male10 <- (36293724 + 60179506 + 17362960)/adults10

## Percent female of those over 18
bench_female10 <- (35442312 + 62380545 + 22905024)/adults10

# Race
bench_white10 <- (223553265 - 12795675 - 13293799 - 13737332 - 8591543)/adults10
bench_black10 <- (38929319 - 2902590 - 2882597 - 3034266 - 2021863)/adults10
bench_amind10 <- (2932248 - 244615 - 243259 - 245049 - 155449)/adults10
bench_asian10 <- (14674252 - 898011 - 928248 - 881590 - 543787)/adults10
bench_pacific10 <- (540013 - 44991 - 43267 - 42387 - 26959)/adults10
bench_other10 <- (19107368 - 1917696 - 1784074 - 1701946 - 1052078)/adults10
bench_tworaces10 <- (9009073 - 1397784 - 1173413 - 1034624 - 562575)/adults10

# Hispanic/Latino
bench_hispanic10 <- (50477594 - 5114488 - 4790771 - 4525242 - 2700390)/adults10

# Age
bench_1824_10 <- 30672088/adults10
bench_2534_10 <- 41063948/adults10
bench_3544_10 <- 41070606/adults10
bench_4564_10 <- (45006716 + 36482729)/adults10
bench_65over_10 <- (21713429 + 13061122 + 5493433)/adults10

# Region
bench_northeast10 <- (55317240 - 3224299 - 3337564 - 3500914 - 2270415)/adults10
bench_midwest10 <- (66927001 - 4334717 - 4432143 - 4522972 - 2838276)/adults10
bench_south10 <- (114555744 - 7671941 - 7674359 - 7701070 - 4741387)/adults10
bench_west10 <- (71945553 - 4970405 - 4904591 - 4952238 - 3104176)/adults10


# Now that I have benchmarks created, I can compare my sample to these 

# Sex
# Recoding to make binary variable for sex
prop.table(table(lucid$gender))
lucid$male <- ifelse(lucid$gender == 1, 1, 0)
lucid$female <- ifelse(lucid$gender == 2, 1, 0)
# comparing to benchmarks
t.test(lucid$male, mu = bench_male10, alternative = "two.sided")
t.test(lucid$female, mu = bench_female10, alternative = "two.sided")


# Race/Ethnicity
# Unfortunately, this variable is not perfectly comparable between the Census 
# and the embedded data received from Lucid. On the Census, people can select 
# "two or more races." This option is not available with Lucid's question. 
# But on Lucid, people can select "Prefer not to answer," which is not an 
# option on the Census. On the Census, the estimated percentage of people who 
# are "two or more races" is 2.1%, while 2.3% of my sample selected 
# "Prefer not to answer." Fortunately, because these are so close, 
# I can treat the Lucid data as if it was the same exact question as the Census.
# Here, I simply show there is no statistically distinguishable difference 
# between the percentage of people in my sample who "preferred not to answer" 
# from the Census benchmark of those who belong to two or more races:

# Recoding data from Lucid to reflect higher level categories
# Lucid data included smaller sub-categories of American Indian and Asian
lucid$race <- with(lucid, 
                   ifelse(ethnicity == 1, "White", 
                          ifelse(ethnicity == 2, "Black",
                                 ifelse(ethnicity == 3, "Am. Ind.",
                                        ifelse(ethnicity == 4 | 
                                                 ethnicity == 5 | 
                                                 ethnicity == 6 |
                                                 ethnicity == 7 |
                                                 ethnicity == 8 | 
                                                 ethnicity == 9 |
                                                 ethnicity == 10, "Asian",
                                               ifelse(ethnicity == 11 |
                                                        ethnicity == 12 |
                                                        ethnicity == 13|
                                                        ethnicity == 14, 
                                                      "Pacific Is.",
                                                      ifelse(ethnicity == 15, 
                                                             "Some other race", 
                                                             "Prefer not to answer")))))))

# Creating binary variables from each of these to conduct T tests with
lucid$white <- ifelse(lucid$race == "White", 1, 0)
lucid$black <- ifelse(lucid$race == "Black", 1, 0)
lucid$amind<- ifelse(lucid$race == "Am. Ind.", 1, 0)
lucid$asian <- ifelse(lucid$race == "Asian", 1, 0)
lucid$pacific <- ifelse(lucid$race == "Pacific Is.", 1, 0)
lucid$other <- ifelse(lucid$race == "Some other race", 1, 0)
lucid$noanswer <- ifelse(lucid$race == "Prefer not to answer", 1, 0)

# Testing if the percentage of people who preferred not to answer is
# statistically distinguishable from 2.1%, which is the percentage of 
# the U.S. population that is "two or more races"
t.test(lucid$noanswer, mu = bench_tworaces10, alternative = "two.sided")

# Because the percentage of Americans who are two or more races on the Census 
# is within the confidence interval of the percentage of my sample who 
# preferred not to answer, in this analysis I am going to treat them as if 
# they were identical items on each survey. It is impossible to tell with 
# this data the extent to which having one of these options rather than the 
# other might structure how people answer the question as a whole, and it 
# inevitably does. However, in this case, any differences caused by the 
# differential items do not make my sample look statistically different 
# than the 2010 Census in any category of racial identification:
t.test(lucid$white, mu = bench_white10, alternative = "two.sided")
t.test(lucid$black, mu = bench_black10, alternative = "two.sided")
t.test(lucid$amind, mu = bench_amind10, alternative = "two.sided")
t.test(lucid$asian, mu = bench_asian10, alternative = "two.sided")
t.test(lucid$pacific, mu = bench_pacific10, alternative = "two.sided")
t.test(lucid$other, mu = bench_other10, alternative = "two.sided")


# Hispanic/Latino
# Categories 1 and 15 indicate not hispanic or prefer not to answer
prop.table(table(lucid$hispanic))

# Creating dichotomous variable with my Lucid measures
lucid$latino <- ifelse(lucid$hispanic == 1 | 
                         lucid$hispanic == 15, 0, 1)

# Conducting T test comparing the percentage of latinos in my sample to 
# the 2010 Census benchmark
t.test(lucid$latino, mu = bench_hispanic10, alternative = "two.sided")


## Age
# Lucid uses age groups of 18-24, 25-34, 35-44, 45-64, and 65 and over to 
# create their benchmarks on the 2010 Census, so I create the same groups here. 

# Now creating age group variables from my lucid data
lucid$age1 <- ifelse(lucid$age < 25, 1, 0)
lucid$age2 <- ifelse(lucid$age < 35 & lucid$age > 24, 1, 0)
lucid$age3 <- ifelse(lucid$age < 45 & lucid$age > 34, 1, 0)
lucid$age4 <- ifelse(lucid$age < 65 & lucid$age > 44, 1, 0)
lucid$age5 <- ifelse(lucid$age >= 65, 1, 0)

# Conducting T tests
t.test(lucid$age1, mu = bench_1824_10, alternative = "two.sided")
t.test(lucid$age2, mu = bench_2534_10, alternative = "two.sided")
t.test(lucid$age3, mu = bench_3544_10, alternative = "two.sided")
t.test(lucid$age4, mu = bench_4564_10, alternative = "two.sided")
t.test(lucid$age5, mu = bench_65over_10, alternative = "two.sided")

# Region
# My sample slightly over-represents the Midwest, but besides this is in 
# line with national representativeness by region. 

# Making binary variables for each region in my sample
lucid$northeast <- ifelse(lucid$region == 1, 1, 0)
lucid$midwest <- ifelse(lucid$region == 2, 1, 0)
lucid$south <- ifelse(lucid$region == 3, 1, 0)
lucid$west <- ifelse(lucid$region == 4, 1, 0)

# Conducting T tests
t.test(lucid$northeast, mu = bench_northeast10, alternative = "two.sided")
t.test(lucid$midwest, mu = bench_midwest10, alternative = "two.sided")
t.test(lucid$south, mu = bench_south10, alternative = "two.sided")
t.test(lucid$west, mu = bench_west10, alternative = "two.sided")


## Bootstrapping estimates of within-subject differences in FT out-group ##

# Democrats

# Creating empty vectors to store estimates
citizen_pols_diffs <- c()
citizen_party_diffs <- c()
pols_party_diffs <- c()

# Running a for loop to generate 1000 samples of the Democrats with replacement
for(i in 1:1000){
  sample_dems <- democrats[sample(460, size=460, replace=TRUE), ]
  
  # Calculating the mean difference in FT scores toward the Republican 
  # Reference groups for each sample, and storing these in the vectors
  citizen_pols_diffs[i] <- mean(sample_dems$citpols) 
  citizen_party_diffs[i] <- mean(sample_dems$citparty) 
  pols_party_diffs[i] <- mean(sample_dems$polparty)
}

# Figure A1: Creating plot using bootstrapped estimates for Democrats

# Creating plotting matrix
bootstrap_plot_dems <- data.frame(diffs = c("Ordinary Reps. - Rep. Party", 
                                            "Rep. Politicians - Rep. Party",
                                            "Ordinary Reps. - Rep. Politicians"),
                                  means = c(mean(citizen_party_diffs), 
                                            mean(pols_party_diffs),
                                            mean(citizen_pols_diffs)),
                                  hi = c(quantile(citizen_party_diffs, .975), 
                                         quantile(pols_party_diffs, .975),
                                         quantile(citizen_pols_diffs, .975)),
                                  lo = c(quantile(citizen_party_diffs, .025), 
                                         quantile(pols_party_diffs, .025),
                                         quantile(citizen_pols_diffs, .025)))

# Creating plot
ggplot(bootstrap_plot_dems, aes(x=diffs, y = means)) +
  geom_point() +
  geom_errorbar(aes(ymin = lo, ymax = hi, width = .1)) +
  geom_hline(yintercept=0, linetype = "dashed") +
  scale_y_continuous(breaks = c(-3,-2,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13),
                     limits = c(-3,13)) +
  labs(title = " ",
       x = " ",
       y = "Means and 95% Confidence Intervals") +
  theme_bw() +
  theme(axis.text.x = element_text(size=12))


# Republicans

# Creating empty vectors to store estimates
citizen_pols_diffs <- c()
citizen_party_diffs <- c()
pols_party_diffs <- c()

# Running a for loop to generate 1000 samples of the Republicans with replacement
for(i in 1:1000){
  sample_reps <- republicans[sample(399, size=399, replace=TRUE), ]
  
  # Calculating the mean difference in FT scores toward the Republican 
  # Reference groups for each sample, and storing these in the vectors
  citizen_pols_diffs[i] <- mean(sample_reps$citpols) 
  citizen_party_diffs[i] <- mean(sample_reps$citparty) 
  pols_party_diffs[i] <- mean(sample_reps$polparty)
}

# Figure A2: Creating plot using bootstrapped estimates for Republicans

# Creating plotting matrix
bootstrap_plot_reps <- data.frame(diffs = c("Ordinary Dems. - Dem. Party", 
                                            "Dem. Politicians - Dem. Party",
                                            "Ordinary Dems. - Dem. Politicians"),
                                  means = c(mean(citizen_party_diffs), 
                                            mean(pols_party_diffs),
                                            mean(citizen_pols_diffs)),
                                  hi = c(quantile(citizen_party_diffs, .975), 
                                         quantile(pols_party_diffs, .975),
                                         quantile(citizen_pols_diffs, .975)),
                                  lo = c(quantile(citizen_party_diffs, .025), 
                                         quantile(pols_party_diffs, .025),
                                         quantile(citizen_pols_diffs, .025)))

# Re-ordering
bootstrap_plot_reps$diffs2 <- factor(bootstrap_plot_reps$diffs, 
                                     levels=c("Ordinary Dems. - Dem. Party",
                                              "Ordinary Dems. - Dem. Politicians",
                                              "Dem. Politicians - Dem. Party"), 
                                     labels=c("Ordinary Dems. - Dem. Party",
                                              "Ordinary Dems. - Dem. Politicians",
                                              "Dem. Politicians - Dem. Party"))
# Creating plot
ggplot(bootstrap_plot_reps, aes(x=diffs2, y = means)) +
  geom_point() +
  geom_errorbar(aes(ymin = lo, ymax = hi, width = .1)) +
  geom_hline(yintercept=0, linetype = "dashed") +
  scale_y_continuous(breaks = c(-3,-2,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13),
                     limits = c(-3,13)) +
  labs(title = " ",
       x = " ",
       y = "Means and 95% Confidence Intervals") +
  theme_bw() +
  theme(axis.text.x = element_text(size=12))


## Modeling partisans' preferences for their own party compared to ordinary 
## members of their own party

# creating new dv that is feelings toward in-party - feelings toward ordinary
# cits in in-party
democrats$inpartycit <- democrats$in_party - democrats$in_cits
republicans$inpartycit <- republicans$in_party - republicans$in_cits

# model for democrats and results
dems_mod <- lm(inpartycit~strong + leaner + ideo + female + white,
               data = democrats)
summary(dems_mod)

# model for republicans and results
reps_mod <- lm(inpartycit~strong + leaner + ideo + female + white,
               data = republicans)
summary(reps_mod)

# creating table in Latex
stargazer(reps_mod, dems_mod)

